
*Read in the data
import delimited using data/nhgis0001_ds239_20185_tract.csv, ///
	varnames(1) clear 

*Remove the Header
drop if _n==1

*Limit to Indiana
keep if state=="Indiana"

*Drop a bunch of variables
drop gisjoin regiona divisiona cousuba placea blkgrpa concita aianhha res_onlya trusta aitscea anrca cbsaa csaa metdiva nectaa cnectaa nectadiva uaa cdcurra sldua sldla zcta5a submcda sdelma sdseca sdunia puma5a bttra btbga

*Rename Variables

rename	  ajwbe001    totpop
rename    ajwbe002    totMale
rename    ajwbe003    m_LT5
rename    ajwbe004    m_5_9
rename    ajwbe005    m_10_14
rename    ajwbe006    m_15_17
rename    ajwbe007    m_18_19
rename    ajwbe008    m_20
rename    ajwbe009    m_21
rename    ajwbe010    m_22_24
rename    ajwbe011    m_25_29
rename    ajwbe012    m_30_34
rename    ajwbe013    m_35_39
rename    ajwbe014    m_40_44
rename    ajwbe015    m_45_49
rename    ajwbe016    m_50_54
rename    ajwbe017    m_55_59
rename    ajwbe018    m_60_61
rename    ajwbe019    m_62_64
rename    ajwbe020    m_65_66
rename    ajwbe021    m_67_69
rename    ajwbe022    m_70_74
rename    ajwbe023    m_75_79
rename    ajwbe024    m_80_84
rename    ajwbe025    m_gte85
rename    ajwbe026    totFemale
rename    ajwbe027    f_LT5
rename    ajwbe028    f_5_9
rename    ajwbe029    f_10_14
rename    ajwbe030    f_15_17
rename    ajwbe031    f_18_19
rename    ajwbe032    f_20
rename    ajwbe033    f_21
rename    ajwbe034    f_22_24
rename    ajwbe035    f_25_29
rename    ajwbe036    f_30_34
rename    ajwbe037    f_35_39
rename    ajwbe038    f_40_44
rename    ajwbe039    f_45_49
rename    ajwbe040    f_50_54
rename    ajwbe041    f_55_59
rename    ajwbe042    f_60_61
rename    ajwbe043    f_62_64
rename    ajwbe044    f_65_66
rename    ajwbe045    f_67_69
rename    ajwbe046    f_70_74
rename    ajwbe047    f_75_79
rename    ajwbe048    f_80_84
rename    ajwbe049    f_gte85

save data/indianaCTract_age_sex.dta, replace

** aggregate t o a few categories
use data/indianaCTract_age_sex.dta, clear

destring m_* f_*, replace

foreach s in m f {
	if "`s'" == "m" local d 0
	if "`s'" == "f" local d 1
	egen n`d'_1 = rowtotal(`s'_LT5-`s'_15_17)
	egen n`d'_2 = rowtotal(`s'_18_19 - `s'_40_44)
	egen n`d'_3 = rowtotal(`s'_45_49 - `s'_62_64)
	egen n`d'_4 = rowtotal(`s'_65 - `s'_gte85)
}

collapse (sum) n0_1 n0_2 n0_3 n0_4 n1_1 n1_2 n1_3 n1_4

gen tt = 1
reshape long n0_ n1_, i(tt) j(ageGroup)
drop tt

label define ag 1 "1. 0-17" 2 "2. 18-44" 3 "3. 45-64" 4 "4. 65+" 
label values ageGroup ag

rename n0_ n0 
rename n1_ n1
reshape long n, i(ageGroup) j(female)

save data/agg_group_size, replace


** aggregate to state levle, but keep age and sex categories
use data/indianaCTract_age_sex.dta, clear
destring m_* f_*, replace

collapse (sum) m_* f_* 

foreach s in m f {
	rename `s'_LT5 `s'3
	rename `s'_5_9 `s'7
	rename `s'_10_14 `s'12
	rename `s'_15_17 `s'16
	rename `s'_18_19 `s'18
	rename `s'_20 `s'20
	rename `s'_21 `s'21
	rename `s'_22_24 `s'23
	rename `s'_25_29 `s'27
	rename `s'_30_34 `s'32
	rename `s'_35_39 `s'37
	rename `s'_40_44 `s'42
	rename `s'_45_49 `s'47
	rename `s'_50_54 `s'52
	rename `s'_55_59 `s'57
	rename `s'_60_61 `s'61
	rename `s'_62_64 `s'63
	rename `s'_65_66 `s'66
	rename `s'_67_69 `s'68
	rename `s'_70_74 `s'72
	rename `s'_75_79 `s'77
	rename `s'_80_84 `s'82
	rename `s'_gte85 `s'88
}
gen state = "IN"
reshape long m f , i(state) j(age)
drop state 

rename m n0
rename f n1
reshape long n, i(age) j(female)

 # delimit ;
twoway
	(line n age if female == 0)
	(line n age if female == 1)
;
# delimit cr
rename age age_group
replace age_group = 2.5 if age == 3
replace age_group = 18.5 if age == 18
replace age_group = 60.5 if age == 61
replace age_group = 65.5 if age == 66

save data/age_group_counts, replace 

rename age_group age

gen 		age_wide =  0 if age < 0
replace age_wide = 17 if age < 18 & missing(age_wide)
replace age_wide = 29 if age < 30 & missing(age_wide)
replace age_wide = 49 if age < 50 & missing(age_wide)
replace age_wide = 64 if age < 65 & missing(age_wide)
replace age_wide = 74 if age < 75  & missing(age_wide)
replace age_wide = 75 if ~missing(age) & missing(age_wide)	

collapse (sum) n, by(age_wide)
save data/age_wide_counts, replace 

tab age_wide [fw=n] 


** make another age group thats 0-11, 12-17, etc.
use data/age_group_counts, clear

rename age_group age
drop if age < 12
gen 		age_wide = 17 if age < 18 
replace age_wide = 29 if age < 30 & missing(age_wide)
replace age_wide = 49 if age < 50 & missing(age_wide)
replace age_wide = 64 if age < 65 & missing(age_wide)
replace age_wide = 74 if age < 75  & missing(age_wide)
replace age_wide = 75 if ~missing(age) & missing(age_wide)	

** rescale age 12 (which is 10-14) because we're going to look at 12+
replace n = int(3/5*n) if age == 12 

collapse (sum) n, by(age_wide)
rename age_wide age_wide_12p
save data/age_wide_counts_12p, replace 
